/************************************  
DESCRIPTION:  !!!! ESTIMATE THE PROPENSITY SCORE !!!!
 !!!!! CREATE MATCHED SAMPLE FROM "POTENTIAL CONTROLS" !!!!

 Also, Add race, birthdate, and ERSD information to claims information
 Want to drop the esrd. Want to get good race variables.  
 
Datasets used:
 (1) `yr'/mbsf_ab_summary
 (2) Bene_doc_match
 (3) Bene_matchdata 
 (4) BeneLvl_limitDemogs05-12

Datasets created:
 (1) BeneLvl_limitDemogs05-12
 (2) PSPats_lor1
 (3) PSPats_logitpre
 (4) PSPats_multi_matches
 (5) PSPats_logit_final

************************************/  

set more off
capture log close
clear all
 
set matsize 10000
 
global origData "N:\MedicareClaims-P045601-BE"
global dataIn "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Data-In\"
global dataOut "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Data-Out"
global dpath "N:\MedicareClaims-P045601-BE\Work\ay_data"
global dataProp "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Data-Out\PropScore\Patients"
global logs "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Logs\PropScore\Patients"
global dpath "N:\MedicareClaims-P045601-BE\Work\ay_data"
global skapath "N:\MedicareClaims-P045601-BE\Work\ska"
global output

adopath +  "N:/SIL-Common/estout"
adopath +  "N:/SIL-Common/outreg2"
adopath +  "N:/SIL-Common/reghdfe-master/package"

log using "$logs/3b.PS_patlogit.log", replace

*PATIENT RACE AND ESRD
	use BENE_ID RTI_RACE_CD BENE_ESRD_IND BENE_SEX_IDENT_CD FIVE_PERCENT_FLAG BENE_BIRTH_DT BENE_MDCR_STATUS_CD using "$origData/Work/2005/mbsf_ab_summary.dta"
	 append using "$origData/Work/2006/mbsf_ab_summary.dta" ,keep(BENE_ID RTI_RACE_CD BENE_ESRD_IND BENE_SEX_IDENT_CD FIVE_PERCENT_FLAG BENE_BIRTH_DT BENE_MDCR_STATUS_CD ) 
	 append using "$origData/Work/2007/mbsf_ab_summary.dta" ,keep(BENE_ID RTI_RACE_CD BENE_ESRD_IND BENE_SEX_IDENT_CD FIVE_PERCENT_FLAG BENE_BIRTH_DT BENE_MDCR_STATUS_CD ) 
	 append using "$origData/Work/2008/mbsf_ab_summary.dta" ,keep(BENE_ID RTI_RACE_CD BENE_ESRD_IND BENE_SEX_IDENT_CD FIVE_PERCENT_FLAG BENE_BIRTH_DT BENE_MDCR_STATUS_CD ) 
	 append using "$origData/Work/2009/mbsf_ab_summary.dta" ,keep(BENE_ID RTI_RACE_CD BENE_ESRD_IND BENE_SEX_IDENT_CD FIVE_PERCENT_FLAG BENE_BIRTH_DT BENE_MDCR_STATUS_CD ) 
	 append using "$origData/Work/2010/mbsf_ab_summary.dta" ,keep(BENE_ID RTI_RACE_CD BENE_ESRD_IND BENE_SEX_IDENT_CD FIVE_PERCENT_FLAG BENE_BIRTH_DT BENE_MDCR_STATUS_CD ) 
	 append using "$origData/Work/2011/mbsf_ab_summary.dta" ,keep(BENE_ID RTI_RACE_CD BENE_ESRD_IND BENE_SEX_IDENT_CD FIVE_PERCENT_FLAG BENE_BIRTH_DT BENE_MDCR_STATUS_CD ) 
	 append using "$origData/Work/2012/mbsf_ab_summary.dta" ,keep(BENE_ID RTI_RACE_CD BENE_ESRD_IND BENE_SEX_IDENT_CD FIVE_PERCENT_FLAG BENE_BIRTH_DT BENE_MDCR_STATUS_CD ) 

*Drop end-stage renal disease patients
	gen ESRD= BENE_ESRD_IND=="Y"
	 egen esrd=max(ESRD), by(BENE_ID)
	drop BENE_ESRD_IND ESRD
	 duplicates drop
	sort BENE_ID

*RACE
*Use RTI-race since it has finer race categories
	duplicates tag BENE_ID, gen(tag)
	destring RTI_RACE_CD, replace force
*RTI race changed over the course of the sample
	egen RTI_RACE_MIN=min(RTI_RACE_CD), by(BENE_ID)
	egen RTI_RACE_MAX=max(RTI_RACE_CD), by(BENE_ID)
*Taking Max race code
	replace RTI_RACE_CD=RTI_RACE_MAX if RTI_RACE_MIN==0 & tag>0
	 drop tag RTI_RACE_MAX RTI_RACE_MIN
	duplicates drop

*Duplicate BENEs  due to multiple race variables - RTI coding issue; Dropping duplicates
	duplicates tag BENE_ID, gen(tag)
	drop if tag>0
	drop tag
 
*Person-level dataset
	sort BENE_ID
 save "$dataOut/BeneLvl_limitDemogs05-12.dta", replace
	clear

*Person-doctor level dataset
use "$dataOut/Bene_doc_match.dta"
sort BENE_ID
save, replace
clear

*PATIENT CHARACTERISTICS  
	*This is hard since want to have patient level information for claims-level dataset spanning multiple years.
	*Putting doctor information on patient-level dataset
	*Also, putting claims level information (for example, count of claims) on person level dataset
	*Using doctor and claims-level information in our propensity score

*NOte: there are many bene_ids that do not merge
	*Vast majority are either young (90% less than 65 in 2005; 90% of older Medicare patients are not part of 5% sample (in order to check this, i kept the 5% sample variable from mbsf)
	*Some (not as many) are ESRD and/or not in the "strict" five percent Medicare sample
	*checked using birth


*Patient level dataset
use "$dataOut/Bene_matchdata.dta", replace
 collapse (sum) claims (max) pop_density mxicd* mxdiab* mxhyp* merger* RACE* urban* SEX* hospbedsz* BENE_BIRTH,  by(BENE_ID STATE* ZIP* )

sort BENE_ID
merge 1:m BENE_ID using "$dataOut/Bene_doc_match.dta"
rename _merge matchVar
capture drop tag 

 collapse (max) claims pop_density mxicd* mxdiab* mxhyp* merger RACE* urban* firmsize fs* ps* doc_Cnt* hospbedsz* BENE_BIRTH SEX* matchVar,  by(BENE_ID STATE* ZIP* )
 rename BENE_BIRTH bene_birth_dt

	sort BENE_ID
	merge 1:1 BENE_ID using "$dataOut/BeneLvl_limitDemogs05-12.dta"
	 drop if _merge==2
	 drop if match==2
	 drop _merge

*RTI race 0 unknown 1 white 2 black 3 other 4 asian 5 hispanic 6 native american
*BASE 0 unknown 1 white 2 black 3 other 4 asian 5 hispanic 6 native american
	gen race = RTI_RACE_CD
	 replace race = RACE1 + 2*RACE2 + 3*RACE3 + 4*RACE4 + 5*RACE5 + 6*RACE6 if race==.
	 replace BENE_BIRTH =bene_birth_dt if BENE_BIRTH==.
	 replace BENE_SEX_IDEN = SEX1 + 2*SEX2


	gen ZIP5=substr(ZIP2005,1,5)
	gen ZIP3=substr(ZIP2005,1,3)
	gen mover=0 
	gen movefar=0 
	gen STATE=STATE2005

	forval i=2006(1)2012 {
		replace ZIP5=substr(ZIP`i',1,5) if ZIP5==""
		replace ZIP3=substr(ZIP`i',1,3) if ZIP3==""
		replace mover=1 if ZIP5~="" & ZIP`i'~="" & substr(ZIP`i',1,5)~=ZIP5 
		replace movefar=1 if ZIP3~="" & ZIP`i'~="" & substr(ZIP`i',1,3)~=ZIP3
		replace STATE=STATE`i' if STATE==""
	}

	encode STATE, gen(state)
	encode ZIP3, gen(zip3)
	drop STATE ZIP3

*How many doctors did the patient see?	
	gen visitlt5=doc_Cnt<5
	gen visit5_10=doc_Cnt>=5 & doc_Cnt<10
	gen visit10_20=doc_Cnt>=10 & doc_Cnt<20
	gen visit20_30=doc_Cnt>=20 & doc_Cnt<30
	gen visit30_50=doc_Cnt>=30 & doc_Cnt<50
	gen visitgt50=doc_Cnt>=50
	gen lnvisit=log(doc_Cnt)
	gen lnvisitsq=log(doc_Cnt)*log(doc_Cnt)

gen lnclaim=log(claim)
gen lnclaimsq=log(claim)*log(claim)


	levelsof state, local(lstate)
	foreach i of local lstate {
		gen state`i'=state==`i' 
		gen state`i'_pop=pop_density*state`i' 
	}

	gen age=round((mdy(1,1,2005)-BENE_BIRTH_DT)/365.25,1)
	tab age
 
*Observed during the year
	gen yr2005=STATE2005~=""
	gen yr2006=STATE2006~=""
	gen yr2007=STATE2007~=""
	gen yr2008=STATE2008~=""
	gen yr2009=STATE2009~=""
	gen yr2010=STATE2010~=""
	gen yr2011=STATE2011~=""
	gen yr2012=STATE2012~=""

	gen lndense = log(pop_density) +1

*Family practice
	gen fmp=psps08
	 replace fmp=1 if psps01==1
	 replace fmp=1 if psps11==1
	 replace fmp=1 if psps93==1
 
*Note: we delete this and then respecify
 	egen SPEC=group(fmp psps02 psps04 psps05 psps06 psps07 psps10 psps13 psps16 psps18 psps20 psps22 psps25 psps26 psps29 psps30 psps34 psps39 psps41 psps48 psps83  )
	gen agecat=1 if age<70
	 replace agecat=2 if age>=70 & age<75
	 replace agecat=3 if age>=75 & age<80
	 replace agecat=4 if age>=80 & age<85
	 replace agecat=5 if age>=85 & age<90
	 replace agecat=6 if age>=90 & age<95
	 replace agecat=7 if age>=95  

gen agesq=age*age

*From the area resource files
	gen urban=urbanrural1==1 | urbanrural2==1
	gen urbanrural_1=urbanrural1==1
	gen urbanrural_2=urbanrural2==1
	gen urbanrural_3=urbanrural3==1
	egen urbanrural_4=rowtotal(urbanrural4-urbanrural10)
	gen urbanrural_5=urbanrural1==.

*WOMEN IS SEX ==2
capture drop male
gen male = BENE_SEX =="1"

summ fs*

*don't want outcome variables in my propensity score definition
rename mxicd_ihd icd_ihd
rename mxicd_ami icd_ami
rename mxicd_glaucoma icd_glaucoma

logit merger mover movefar visit* lnvisit lnvisitsq mxicd*chronic* mxicd1_acute mxicd5_acute mxicd18_acute mxhypertension mxdiabetes i.agecat age agesq i.race urbanrural_* pop_density i.state#urban fs* yr* fmp psps02 psps04 psps05 psps06 psps07 psps10 psps13 psps16 psps18 psps20 psps22 psps25 psps26 psps29 psps30 psps34 psps39 psps41 psps48 psps83 if male==1
predict xbetam , xb

logit merger mover movefar visit* lnvisit lnvisitsq mxicd*chronic* mxicd1_acute mxicd5_acute mxicd18_acute mxhypertension mxdiabetes i.agecat age agesq i.race urbanrural_* pop_density i.state#urban fs* yr* fmp psps02 psps04 psps05 psps06 psps07 psps10 psps13 psps16 psps18 psps20 psps22 psps25 psps26 psps29 psps30 psps34 psps39 psps41 psps48 psps83 if male==0
predict xbetaf , xb


gen xbeta=xbetam
 replace xbeta=xbetaf if male==0
gen ehat= exp(xbeta)/(1 + exp(xbeta)) 
gen lor=ln( ehat /(1 - ehat))

save "$dataOut/PSPats_logitpre.dta" , replace


******************************************************
*AT THIS POINT THE PROPENSITY SCORE HAS BEEN ESTIMATED
*NOW CHOOSING THE MATCHES
******************************************************


*IDENTIFY MATCHES FROM THE PROPENSITY SCORE
use "$dataOut/PSPats_logitpre.dta" , replace

*Preliminaries
	gen counter=1
	 capture drop SPEC

*Create the group
 *Group: Race
 gen racegroup=RTI
  replace racegroup=2 if RTI!=1
  replace racegroup=3 if RTI==5
 capture drop exactmatch
 *Group: Age
 gen agegroup=age
  replace agegroup=agecat if age>95
 egen icdrt = rowtotal(psps02 psps04 psps05 psps07 psps10 psps13 psps16  psps25 psps26 psps29 psps34 psps39 psps41 psps48 psps83 )
 gen icdrtf = icdrt>0
 *Group: Specialty
 egen SPEC=group(fmp psps06  psps18 psps20 psps22  psps30 icdrtf)
 egen exactmatch=group(SPEC male agegroup racegroup )

*Count of controls within the group
 egen sumcounter=sum(counter), by(exactmatch)
*Count of treated within the group
 egen totpats=sum(merger), by(exactmatch )
 drop if totpats==0

*RESHAPE MERGERS WITHIN THE EXACTMATCH
 preserve
 tempfile mlorfile
   keep if merger==1
   keep exactmatch lor  
  sort exactmatch 
   by exactmatch: gen id=_n
 reshape wide lor, i(exactmatch) j(id)
   sort exactmatch
 save `mlorfile', replace
 clear

 *Merge the "acquisitions" back onto the control group file
 restore
 sort exactmatch lor
  merge m:1 exactmatch using `mlorfile'
 drop _merge

 *Calculate absolute distance between acquisition file and control group file
 foreach i of varlist lor* {
	local j = subinstr("`i'","lor","",.)
	gen dist`j'=abs(lor-`i')
	di "`j'"
	}

*FIND MINIMUM DISTANCE WITHIN EXACTMATCH
 preserve
 tempfile mindistfile
*Find the minimum 
 collapse (min) dist*, by(exactmatch merger)
	sort exactmatch merger
	drop dist
 
 foreach var of varlist dist* {
	local j = subinstr("`var'","dist","",.)
	rename `var' mindist`j'
	}
 save `mindistfile', replace
 clear
 restore

*Merge the minimum back onto the control group file
 capture drop _merge
  sort exactmatch merger
 merge m:1 exactmatch merger using `mindistfile'
  drop _merge 

*Identify the match
 local j=0

	foreach var of varlist mindist* {
		local j = subinstr("`var'","mindist","",.)
		di "`j'"
		gen flag`j' = dist`j' == `var' & dist`j'!=.
	}
 
save "$dataOut/PSPats_lor1.dta" , replace

use "$dataOut/PSPats_lor1.dta" , replace

*SOME CONTROL GROUP PATIENTS ARE MAPPED TO MULTIPLE TREATED PATIENTS
*THE FOLLOWING CODE IS DESIGNED TO MAP THESE BENEFICIARIES TO THE BEST (I.E., HIGHEST PROPENSITY SCORE) TREATED BENEFICIARY 
* AND THEN MATCH TO THE NEXT BENEFICIARY, AND SO ON

*IDENTIFY MULTIPLE MATCHES
	egen rs = rowtotal(flag*)

*1. SAVE SINGULAR MATCHES and TREATED PATIENTS
	gen keeper=1 if merger==1
	replace keeper=1 if rs==1
	drop mindist*
	keep if keeper==1
	drop if lor==.  

*Keep only one of multiple controls to the same Treatment (affects 2 patients)
	gen control=1-merger
	egen sumcontrol=sum(control), by(exactmatch lor merger)

 *Keep beneficiary that's in the sample longer
*use yrflag to identify how long in the sample
	egen samlen=rowtotal(yr2*)

*Match controls to treatments with an id - start by using exactmatch and the treatment lor
	gen matchlor=.
		foreach i of varlist flag* {
			local j=regexr("`i'","flag","")
			replace matchlor=lor`j' if `i'==1
		}

*if samples are the same length, choose based on BENE_ID value
	gsort exactmatch matchlor merger -lor -samlen BENE_ID 
	by exactmatch matchlor merger: replace keeper=0 if _n>1 & merger==0
	drop samlen
	drop if keeper==0

	keep BENE_ID merger exactmatch totpats sumcounter ehat lor matchlor

	sort exactmatch matchlor merger
	by exactmatch matchlor: gen matched=_N==2

*SAVE THE FIRST ROUND OF EASY TO MATCH CONTROL GROUPS
   save "$dataOut/PSPats_logit_list.dta" , replace
*SAVE THE FIRST ROUND OF EASY TO MATCH CONTROL GROUPS
	clear

*2. SAVE SINGULAR TREATMENTs in an exactmatch (i.e., no potential controls in the exactmatch category) - match later by propensity score
*Get back dataset with non-singular matches
*keep only non-singular matches and non-matches

use "$dataOut/PSPats_lor1.dta" , replace
 egen rs = rowtotal(flag*)
 drop mindist*

*sometimes sumcounter includes controls without estimated lor drop them and recount
 drop if lor==.
 drop sumcounter
 egen sumcounter=sum(counter), by(exactmatch)

*Same number of treatments as observations
 gen keeper=rs==sumcount
*Number of treatments is greater than number of controls  
 gsort exactmatch merger -lor
 by exactmatch merger: replace keeper=1 if 2*totpats>sumcount & _n>.5*totpats

 gen sex_pat=BENE_SEX
 destring sex_pat , replace

 gen birthcohort=round((mdy(1,1,2005)-BENE_BIRTH_DT)/365.25,1)

*capture drop racegroup 
 capture drop agegroup 
 capture drop icdrt 
 capture drop icdrtf 
 capture drop SPEC 
 capture drop totpats 
 capture drop rs

*this drops the variable merger, but singulars are all merger==1
	 tab merger keeper
	 drop ZIP2005-urbanrural_3
	 tempfile singulars
	 keep if keeper==1
	 drop keeper
	save `singulars', replace
clear

*IDENTIFY TREATMENT PATIENTS THAT ARE IMPACTED
*NOTE THAT THE SOME CONTROL PATIENTS ARE MATCHED TO AS MANY AS 10 TREATED PATIENTS

*Keep only multiple matches
	use "$dataOut/PSPats_lor1.dta" , replace
		egen rs = rowtotal(flag*)
	drop if merger==1
	drop if rs==1
		egen samlen=rowtotal(yr2*)

*Drop those 'exact match' groups without multiple treated patients  
		egen maxrs=max(rs), by(exactmatch)
	drop if maxrs<2
	drop maxrs
preserve

*This is a sample of only multiple matches
	keep if rs > 1

*make it unique by exactmatch
	keep exactmatch flag* lor*

 collapse (max) flag* lor*, by(exactmatch)

*Turn it and keep only records with multiple matched treated patients
		capture rename lor origlor
 reshape long flag lor, i(exactmatch) j(counter)
	keep if flag==1
	drop flag origlor

*Turn it back to merge on... up to 114 multiple matches within an exactmatch:
*Note: a particular line only had 6, but an exactmatch group had as many as 114
*Sort by the highest LOR

	 gsort exactmatch -lor 
	 by exactmatch: gen newcounter=_n
	*should be 114
	 summ newcounter
	 local ct=r(max)
	 gen keeper=1
	 tempfile multiples
	save `multiples', replace

*Create different files for each Treatment LOR
		forval i=1(1)`ct' {
			use `multiples', replace
			 tempfile multiple`i' 
			 keep if newcounter==`i'
			save `multiple`i'', replace
		clear
		}

*Merge progressively by the highest lor - match, then merge. Next, remove matched record from the data and merge again. Repeat. 
restore
		gen birthcohort=age
		 replace birthcohort=96 if age>95
		gen sex_pat=BENE_SEX
		 destring sex, replace
	keep BENE_ID lor exactmatch rs merger samlen birthcohort sex racegroup
		gen keeper=1
	sort exactmatch keeper
		rename lor lor_c
		gen ct_t=.
		gen lor_t=.
		gen nc_t=.

		forval i=1(1)`ct' {
			sort exactmatch keeper
		*Merge highest lor by exactmatch
			merge m:1 exactmatch keeper using `multiple`i''
		*Distance
				gen dist=abs(lor_c-lor)
			*Minimum distance
				egen mindist=min(dist), by(exactmatch keeper)
			*Remove match from dataset
				 replace keeper=0 if dist==mindist & _merge==3
			*Keep relevant variables 
				 replace lor_t=lor if keeper==0 & _merge==3
				 replace nc_t=newcounter if keeper==0 & _merge==3
				 replace ct_t=counter if keeper==0 & _merge==3
			*Drop irrelevant variables
			drop dist mindist lor newcounter _merge counter
			} 

		gen em_t=exactmatch if keeper==0

*There were 2 duplicate observations with multiple treatments with the same lor. keep the relevant match with highest bene_id (i.e., random).
	sort exactmatch keeper nc_t BENE_I
		by exactmatch keeper nc_t: gen id=_n
		gen dropvar=keeper==0 & id>1
		 replace keeper=1 if dropvar==1
	drop dropvar id 

	sort keeper birthcohort sex race
	drop if BENE_ID==""

*Done with multiple matches
 save "$dataOut/PSPats_multi_matches.dta" , replace
clear

*Merge singulars (i.e., treatments with no control group bene's within exactmatch) onto the rest of the file
 use `singulars'
		capture gen keeper=1
*Essentially keep lor, exactmatch and that's it - and turn it wide: 1 record dataset with all the singular lor's wide
	drop lor1-flag1 flag* xbeta* sumcounter counter ehat BENE_ID 
		rename lor logodds
	gsort birthcohort sex -logodds
	 by birthcohort sex : gen id=_n
*Turning it wide (keeping exactmatch to create a matcher id later - not using it for match)
	reshape wide exactmatch logodds, i(keeper birthcohort sex_pat race ) j(id)

*Merge onto "rest" of the file
merge 1:m keeper birthcohort sex race using "$dataOut/PSPats_multi_matches.dta"
	preserve
	tempfile nomerge
	keep if _merge==1
	keep keeper birthcohort sex logodds*
	local i=0
		foreach var of varlist logodds* {
			local i=`i'+1
			rename `var' lo_`i'
		}

	save `nomerge', replace
	clear

	restore

tab _merge if _merge==1
local k=r(r)

drop if _merge==1
*Identify closest match (remember, this is now within cohort-sex rather than exactmatch - using logodds ratio)
*Loops over set of "singular" treatments
		foreach i of varlist logodds* {
	*Strips off number
			local j=regexr("`i'","[A-Za-z]+","")
			di "`j'"
			*Creates distance
			gen dist`j'=abs(`i'-lor_c)
			*Creates minimum distance
			egen mindist`j'=min(dist`j'), by(birthcohort sex race)
			*Chooses control where distance=minimum distance
			gen flag`j'=dist`j'==mindist`j' & _merge==3
			 replace flag`j'=0 if dist`j'==. & _merge==3
			 replace lor_t=`i' if flag`j'==1  & _merge==3
			 replace em_t=exactmatch`j' if flag`j'==1 & _merge==3
		}

*If some treatments didn't merge onto age-sex-race, do it only for age-sex

	if `k'==1 {
		drop _merge

 *Double-check that no control is matched to multiple treatment people  
		egen flagflag=rowtotal(flag*) 
		 replace keeper=0 if flagflag>0

 *Do it again, minus race for the non-merge treatments (race=3 is sparse among the aged)
	merge m:1 keeper birthcohort sex using `nomerge'
			foreach i of varlist logodds* {
			*Strips off number
				local j=regexr("`i'","[A-Za-z]+","")
				di "`j'"
				 replace `i' = lo_`j' if _merge==3
			*Creates distance
				 replace dist`j'=abs(`i'-lor_c)
			*Creates minimum distance
				egen mindist_`j'=min(dist`j'), by(keeper birthcohort sex )
			*Chooses control where distance=minimum distance
			 	  replace flag`j'=dist`j'==mindist_`j' & _merge==3
				  replace flag`j'=0 if dist`j'==. & _merge==3
				  replace lor_t=`i' if flag`j'==1  & _merge==3
				  replace em_t=exactmatch`j' if flag`j'==1 & _merge==3
				}

		drop flagflag
			}

		egen flagflag=rowtotal(flag*) 
		 replace keeper=0 if flagflag>0

*Cleanup
*drop the turned exactmatches (numbered) but keep the own one
	ds exactmatch*
		unab vl : `r(varlist)'
		unab exclude : exactmatch
		local varlist : list vl - exclude

		drop logodds* dist* mindist* flag* _merge lo_* `varlist'
		keep if keeper==0
		drop keeper

*All of the "hard to match" matches are done
tempfile multimatches
save `multimatches', replace

*Add hard to match matches to easy-to-match matches
 append using "$dataOut/PSPats_logit_list.dta"
		 replace lor_t=matchlor if matchlor~=.
		 replace em_t=exactmatch if em_t==.
		 replace lor=lor_c if lor==.

*Create the "matchid" using the prop score and the exactmatch id
		 replace lor_t=0 if lor_t==.
		egen id=group(em_t lor_t)
*There are a few observations with exactmatch-propscore duplicates
	sort id merger BENE_ID
	 by id merger: gen cnt=_n

*Create alternative id to deal with duplicates - matched based on BENe_Id (see code above)
		egen id_alt=group(id cnt) 
	sort id_alt merger

*Drop the one overmatched control
	 by id_alt: gen counter=_N
	drop if BENE_ID == "8888888X2XWeeWe" & counter==1 

*Create alternative id to deal with duplicates - matched based on BENe_Id (see code above)
	drop id_alt
		egen maxid=max(id)
		egen id_alt = group(lor_t)
		gen newid=id
		 replace newid=maxid+id_alt if counter==1

	drop  counter matchlor lor_c matched rs samlen ct_t nc_t ehat cnt id sumcounter totpats sex_pat birthcohort id_alt racegroup male maxid
		rename newid id   
 save "$dataOut/PSPats_logit_final.dta", replace


